##############################################################################
# Main Analysis I
###############################################################################
# All Models reported in Main Text including their Tables
###############################################################################
# Content
###############################################################################
# 1) Dependencies
# 2) Load Data
# 3) Generate aggregated hits and documents per candidates for 2015 vs 2019
# 4) Models
# 5) Save Model Output in Tables
###############################################################################
# 1) Dependencies
###############################################################################
library(readr)
library(dplyr)
library(ggplot2)
library(gganimate)
library(ggeffects)
library(ggExtra)
library(ggridges)
library(ggrepel)
library(grid)
library(scales)
library(lubridate)
library(extrafont)
library(reshape2)
library(here)
library(ggforce)
library(png)
library(readxl)
library(grid)
library(gridExtra)
library(ggpubr)
library(sjPlot)
library(ggplot2)
library(lme4)
library(effects)
library(texreg)
library(MASS)
###############################################################################
# 2) Load Data
###############################################################################
# Set Path
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
rm(list=ls())

# Options and Seed
options(stringsAsFactors = F)
set.seed(0213)

# Custom functions
# ggplot rescale x axis....
scale_x_reordered <- function(..., sep = "___") {
  reg <- paste0(sep, ".+$")
  ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...)
}
# ggplot order over facets...
reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
  new_x <- paste(x, within, sep = sep)
  stats::reorder(new_x, by, FUN = fun)
}

# Source Theme for Figures
source('ggplot_theme_ddl.R', encoding = "UTF-8")

# Load Data
df <- readRDS("../data/smd_ner_2015_2019_combined.RDS")
candidates_list_15 <- read.csv('../support/candidates-2015/00-Named_Entity_List_withID.csv', stringsAsFactors = F) %>% 
  as_tibble %>% mutate(id=as.character(id))
candidates_list_19 <- read.csv('../support/candidates-2019/00-Named_Entity_List_withID.csv', stringsAsFactors = F) %>% 
  as_tibble %>% mutate(id=as.character(id))

candidates_list_19 <- candidates_list_19 %>% mutate(candidacy = as.character(gsub("\\s", " ", council))) %>% 
  mutate(council = case_when(candidacy %in% c("SR", "Former Staenderat", "Former Staenderat") ~ "sr",
                             candidacy %in% c("NR", "Former Nationalrat", "Former Nationalrat") ~  "nr",
                             candidacy %in% c("SR und NR", "NR und SR") ~ "sr & nr")) %>% 
  dplyr::select(-c(candidacy))

df$year <- as.character(df$year)
df$date <- format(as.Date(df$date, "%m-%d"), format = "%m-%d")
df$fullname <- ifelse(df$fullname == "Adèle Goumaz", "Adèle Thorens Goumaz", df$fullname)
df$fullname <- ifelse(df$fullname == "Niklaus-Samuel Gugger", "Nik Gugger", df$fullname)
df$incumbent <- ifelse(df$fullname == "Philipp Müller", 1, df$incumbent)

#-----------------------------------------------------------------------------#
# Configurations
#-----------------------------------------------------------------------------#
# Remove Federal Councilors
council <- TRUE
# Remove Party Presidents
president <- TRUE

# Unwanted Topics
unwanted_topics <- c('PoliticalSystem', 'Other_unclassified_Political_Texts', 
                     'NotPolitical', 'Not Classified', 'Other_Problems')

# Council members 2015
council_15 <- c("Ueli Maurer", "Alain Berset", "Didier Burkhalter", 
                "Simonetta Sommaruga", "Eveline Widmer Schlumpf", 
                "Johann Schneider-Ammann", "Doris Leuthard")

# Council members 2019
council_19 <- c("Ueli Maurer", "Alain Berset", "Ignazio Cassis", 
                "Simonetta Sommaruga", "Guy Parmelin",
                "Karin Keller-Sutter", "Viola Amherd")

# Party Presidents 2015
presi_15 <- c("Toni Brunner", "Christian Levrat", "Philipp Müller", 
              "Christophe Darbellay", "Regula Rytz", "Martin Bäumle", "Martin Landolt")

# Party Presidents 2019
presi_19 <- c("Albert Rösti", "Christian Levrat", "Petra Gössi", 
              "Gerhard Pfister", "Regula Rytz", "Jürg Grossen", "Martin Landolt")

# Remove Council Members:
if(council == T){
  df <- df %>% dplyr::filter((year == "2015" & !fullname %in% council_15) |
                               (year == "2019" & !fullname %in% council_19))
}


# Remove Party Presidents:
if(president == T){
  df <- df %>% dplyr::filter((year == "2015" & !fullname %in% presi_15) |
                               (year == "2019" & !fullname %in% presi_19))
}

# Values for colors
values_gender_1 <- c("f" = "#DD2461", "m" = "#7D7D7C")
shapes_gender_1 <- c("f" = 1, "m" = 5)
.fill2 <- unlist(colourList[['colour']][['parties']])
names(.fill2) <- toupper(names(.fill2))
.fill2[22] <- "grey"
names(.fill2) <- c(names(.fill2)[1:21],"Rest")
values_year_2 <- c("2015" = "#DD2461", "2019" = "#7D7D7C")
values_gender_2 <- c("Women" = "#DD2461", "Men" = "#7D7D7C")
###############################################################################
# 3) Generate aggregated hits and documents per candidates for 2015 vs 2019
###############################################################################
df_hits_f <- df %>% group_by(year, fullname, person.id, gender, party, canton, list_place_1, age, incumbent, selectsclass, council) %>% summarise(n.hits = n())
unique(df_hits_f$council)

# Check if council NA are just the articles with no mention at all:
help_council_na <- df_hits_f %>% filter(is.na(council) == T)
unique(help_council_na$fullname)

# Add Candidates with zero mentions via the lists of candidates:
names(df_hits_f)
sapply(df_hits_f, mode)

helper <- filter(df_hits_f, year == "2015")
candidates_list_15 <- candidates_list_15 %>% filter(!id %in% helper$person.id)

helper <- filter(df_hits_f, year == "2019")
candidates_list_19 <- candidates_list_19 %>% filter(!id %in% helper$person.id)


candidates_list_15 <- candidates_list_15 %>% dplyr::select(c(age,district,fullname,gender,incumbent,list_place_1,party,id,council)) %>% 
  dplyr::mutate(person.id = id,
                canton = district,
                n.hits = 0,
                selectsclass = NA,
                year = "2015") %>% 
  dplyr::select(-c("district","id"))



candidates_list_19 <- candidates_list_19 %>% dplyr::select(c(age,district,fullname,gender,incumbent,list_place_1,party,id,council)) %>% 
  dplyr::mutate(person.id = id,
                canton = district,
                n.hits = 0,
                selectsclass = NA,
                year = "2019") %>% 
  dplyr::select(-c("district","id"))

#Check if we have candidates where we don't know the chamber:
unique(candidates_list_15$council)
unique(candidates_list_19$council)

df_hits_f <- dplyr::bind_rows(df_hits_f,candidates_list_15,candidates_list_19)

df_hits_15 <- df_hits_f %>% filter(year == "2015") %>% group_by(fullname, person.id, gender, party, canton, list_place_1, age, incumbent, council) %>%  
  tidyr::complete(selectsclass = unique(df_hits_f$selectsclass), fill = list(n.hits = 0,
                                                                           year = "2015"))
# Complete Selectsclass for each Person in each Year
df_hits_19 <- df_hits_f %>% filter(year == "2019") %>% group_by(fullname, person.id, gender, party, canton, list_place_1, age, incumbent, council) %>%  
  tidyr::complete(selectsclass = unique(df_hits_f$selectsclass), fill = list(n.hits = 0,
                                                                           year = "2019"))
# Sanity Check
length(unique(df_hits_19$person.id))
length(unique(df_hits_15$person.id))

# Combine the completed dfs
df_hits_f <- dplyr::bind_rows(df_hits_15,df_hits_19)

# Remove the Class NA
df_hits_f <- df_hits_f %>% filter(is.na(selectsclass) == F) %>% filter(is.na(person.id) == F)

# Fix the Canton with a Typo!
df_hits_f <- df_hits_f %>% mutate(canton = ifelse(canton == "St.Gallen", "St. Gallen", canton))
sort(unique(df_hits_f$canton))

# Sanity Check 
sanity <- df_hits_f %>% group_by(year, selectsclass) %>% summarise(n = n())
sanity


###############################################################################
# 4) Models
###############################################################################
# Factorize some variables
df_hits_f$party <- as.factor(df_hits_f$party)
df_hits_f$canton <- as.factor(df_hits_f$canton)
df_hits_f$list_place <- 0
df_hits_f$list_place[df_hits_f$list_place_1 %in% c(1,2,3)] <- 1
df_hits_f$age_sq <- df_hits_f$age * df_hits_f$age
df_hits_f$incumbent <- as.factor(df_hits_f$incumbent)
df_hits_f$gender <- as.factor(df_hits_f$gender)

df_hits_agg <- df_hits_f %>% ungroup() %>% dplyr::select(-c(`person.id`)) %>% 
  group_by(year, fullname, gender, party, canton, list_place_1, list_place, age, age_sq, incumbent, council) %>% 
  dplyr::select(-selectsclass) %>%
  summarise_all(sum) %>%
  mutate(standerat=as.factor(ifelse(council == "nr","Nationalrat","Ständerat"))) %>%
  mutate(list_place_1=ifelse(is.na(list_place_1),1,list_place_1)) %>%
  filter(is.na(fullname)==F)

df_hits_2015 <- df_hits_agg %>% filter(year == "2015")
df_hits_2019 <- df_hits_agg %>% filter(year == "2019")

pred_df <- expand.grid(gender=unique(df_hits_agg$gender),
                       year=unique(df_hits_agg$year),
                       party="SP",
                       list_place_1=mean(df_hits_agg$list_place_1,),
                       canton="Zürich",
                       standerat=unique(df_hits_agg$standerat),
                       incumbent=unique(df_hits_agg$incumbent))


# ----------------------------------------------------------------------------#
# Negative Binomial Regression Models
# ----------------------------------------------------------------------------#
hits.base <- glm.nb(n.hits~gender*year+party,data=df_hits_agg)
summary(hits.base)

hits.chamber <- glm.nb(n.hits~gender*year+party+standerat,data=df_hits_agg)
summary(hits.chamber)

hits.controls <- glm.nb(n.hits~gender*year+party+standerat+list_place_1+canton+incumbent,data=df_hits_agg)
summary(hits.controls)


hits.controls_inc <- glm.nb(n.hits~gender*year+party+standerat+list_place_1+canton,data=filter(df_hits_agg, incumbent == 1))
summary(hits.controls_inc)

hits.controls_noinc <- glm.nb(n.hits~gender*year+party+standerat+list_place_1+canton,data=filter(df_hits_agg, incumbent == 0))
summary(hits.controls_noinc)

# ----------------------------------------------------------------------------#
# Negative Binomial Multilevel Regression Models (Canton Only)
# ----------------------------------------------------------------------------#
set.seed(0213)
control = glmerControl(optimizer = "nlminbwrap", optCtrl = list(maxfun = 2e5))

# Scale some Variables for better convergence...
df_hits_agg_scaled <- df_hits_agg

# No scaling, since all variables are not very scaleable!

# Base Model
hits.base.ml <- glmer.nb(n.hits ~ gender*year + party + (1|canton), data=df_hits_agg_scaled)
summary(hits.base.ml)

# Chamber Model
hits.chamber.ml <- glmer.nb(n.hits ~ gender*year + party + standerat + (1|canton), data=df_hits_agg_scaled)
summary(hits.chamber.ml)

# Controls Model
hits.controls.ml <- glmer.nb(n.hits ~ gender*year + party + standerat + list_place_1 + incumbent + (1|canton), data=df_hits_agg_scaled)
summary(hits.controls.ml)

###############################################################################
# 5) Save Model Output in Tables
###############################################################################
htmlreg(list(hits.base,hits.base.ml,hits.controls,hits.controls.ml,hits.controls_inc,hits.controls_noinc),
        file = "../tables_main/baseline_new.html",
        # omit.coef = "(partei_)|(kanton_)"
)


texreg::texreg(list(hits.base,hits.base.ml,hits.controls,hits.controls.ml,hits.controls_inc,hits.controls_noinc),omit.coef="canton|party",#reorder.coef=c(2,3,4,5,6,7,1),
               custom.gof.rows = list("Multilevel" = c("", "\\checkmark", "", "\\checkmark", "", ""), 
                                      "Party FEs" = c("\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark"),
                                      "Cantonal FEs"=c("", "",  "\\checkmark", "", "\\checkmark", "\\checkmark")),
               file="../tables_main/baseline_new.tex",
               custom.coef.map = list("genderm"="Male",
                                      "year2019"="2019",
                                      "genderm:year2019"="Male*2019",
                                      "standeratStänderat"="Council of States",
                                      "list_place_1"="Top list",
                                      "incumbent1"="incumbent",
                                      "(Intercept)"="(Intercept)"),
               label="table:results_mentions",
               float.pos="h",
               caption = "Statistical negative binomial models of important predictors of media coverage, operationalized as the daily frequency of newspaper mentions of female political candidates. The unit of analysis is the idividual candidate. Model 1a is the baseline model controlling for the party effect of the gender gap. Model 2a displays the effect of media coverage with all important predictors, while 2c displays the effect for the same predictors looking only at the incumbents and 2d for all non-incumbents. Both model 1b and model 2b are the same models using a multilevel approach with the cantons as level two variable.")

